"""参考视频   https://www.youtube.com/watch?v=VkD2Wx14p-U&list=PLwDQt7s1o9J4DqlcIaDLJPnP902mZs0Js&index=5&t=6s
           https://www.youtube.com/watch?v=Z2QvqN_RX6M
url = 'https://aregsr.vip/forum-103-'
网盘下载区 7628页


"""
from pathlib import Path
import xlrd
import os
import xlwt
import random
import xlutils
import lxml
import pandas as pd
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import requests
from requests.cookies import RequestsCookieJar
import urllib3
import re
import threading
import xlsxwriter

# 设置最大线程锁
thread_lock = threading.Semaphore(value=10)

import json

# requests读取cookies

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

s = requests.session()
s.verify = False
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"}
# s.get("http://www.baidu.com")

# 这里我们使用cookie对象进行处理
jar = RequestsCookieJar()
with open("cookies.txt", "r") as fp:
    cookies = json.load(fp)
    for cookie in cookies:
        jar.set(cookie['name'], cookie['value'])

url = 'https://aregsr.vip/forum-103-'

# # 或者如果你只是想创建一张空表
workbook = xlsxwriter.Workbook('2700-wpxz2.xls')
# # 创建一个sheet
worksheet = workbook.add_worksheet('sheet1')
worksheet2 = workbook.add_worksheet('sheet2')
worksheet3 = workbook.add_worksheet('sheet3')
worksheet4 = workbook.add_worksheet('sheet4')
# 翻页模块,并且获取不同文件夹的链接

for i in range(2601, 3601):

    if i % 100 == 0:
        workbook.close()

        # 或者如果你只是想创建一张空表
        workbook = xlsxwriter.Workbook(str(i + 100) + '-wpxz2.xls')
        # 创建一个sheet
        worksheet = workbook.add_worksheet('sheet1')
        worksheet2 = workbook.add_worksheet('sheet2')
        worksheet3 = workbook.add_worksheet('sheet3')
        worksheet4 = workbook.add_worksheet('sheet4')

    try:

        # 打开每页下载里面的东西
        def get_alltitle(url5, k):
            print(url5)
            url3 = 'https://aregsr.vip/' + url5

            # 保存网页url
            worksheet4.write(k, i, url3)

            try:
                r = s.get(url3, headers=headers, cookies=jar, timeout=10)
                r.encoding = "utf-8"
                html2 = r.text

                time.sleep(0.2)

                # 文件夹名字  匹配
                # data-clipboard-text="最美91女神火爆豪乳美少女〖不见星空〗性爱私拍流出 萝莉床边操 后入操女仆兔兔中 高清720P版【MP4/350MB】,https://www.wechatilne.space/?x=163609294" >选择复制</button>
                # data-clipboard-text="极品美乳，高端外围，蜂腰翘臀尤物，AV视角展示多种角度尽收眼底 [1440MB/MP4],https://www.xingba2017.com/?x=163609294" >选择复制</button>
                pattern2 = r'data-clipboard-text=(.*?)https'
                file_names1 = re.search(pattern2, html2)
                time.sleep(0.1)

                file_names2 = file_names1.group()
                time.sleep(0.1)
                file_names3 = file_names2.replace('"', '')
                time.sleep(0.1)
                file_names4 = file_names3[20:]
                time.sleep(0.1)
                file_names5 = file_names4[:-6]
                time.sleep(0.1)
                file_names6 = file_names5.replace('/', '，')
                time.sleep(0.1)
                # print(file_names6)
                worksheet2.write(k, i, file_names6)
                print('标题成功')
            except:
                print('没标题')
            time.sleep(0.2)
            try:

                time.sleep(0.1)
                pattern1 = re.compile(r'gZu2(.*?)3t6')
                result1 = re.search(pattern1, r.text)
                time.sleep(0.1)
                pattern2 = re.compile(r'：8888')
                result2 = re.search(pattern2, r.text)
                time.sleep(0.1)
                pattern3 = re.compile(r'：1024')
                result3 = re.search(pattern3, r.text)
                time.sleep(0.1)
                pattern4 = re.compile(r'：2046')
                result4 = re.search(pattern4, r.text)
                time.sleep(0.1)
                pattern5 = re.compile(r'g2j0s4g8rhs')
                result5 = re.search(pattern5, r.text)
                time.sleep(0.1)
                pattern6 = re.compile(r'gZu2(.*?)8t5')
                result6 = re.search(pattern6, r.text)
                time.sleep(0.1)
                pattern7 = re.compile(r'：118')
                result7 = re.search(pattern7, r.text)
                time.sleep(0.1)
                pattern19 = re.compile(r'sex8.cc')
                result19 = re.search(pattern19, r.text)
                time.sleep(0.1)
                pattern16 = re.compile(r'【解压密码】：(.*)')
                result16 = re.search(pattern16, r.text)
                time.sleep(0.1)
                pattern21 = re.compile(r'【文件密码】：(.*)')
                result21 = re.search(pattern21, r.text)
                time.sleep(0.1)
                pattern23 = re.compile(r'解压密码(.*)')
                result23 = re.search(pattern23, r.text)

                time.sleep(0.1)
                # pattern = re.compile(r'：8888')
                # result = re.match(pattern2, '：8888')
                # pattern = re.compile(r'：118')
                # result = re.match(pattern3, '：118')
                if result1:
                    # 获取一个已存在的sheet
                    # worksheet = workbook.get_sheet('sheet')
                    # # 写入一个值，括号内分别为行数、列数、内容
                    worksheet.write(k, i, 'gZu2(b1I0-Dc&3t6')
                    print('1成功')
                else:
                    time.sleep(0.1)
                    if result2:
                        worksheet.write(k, i, '8888')
                        print('2成功')
                    else:
                        time.sleep(0.1)
                        if result3:
                            worksheet.write(k, i, '1024')
                            print('3成功')
                        else:
                            time.sleep(0.1)
                            if result4:
                                worksheet.write(k, i, '2046')
                                print('4成功')
                            else:
                                time.sleep(0.1)
                                if result5:
                                    worksheet.write(k, i, 'g2j0s4g8rhs')
                                    print('5成功')
                                else:
                                    time.sleep(0.1)
                                    if result6:
                                        worksheet.write(k, i, 'gZu2(b1I0-Dc&8t5')
                                        print('6成功')
                                    else:
                                        time.sleep(0.1)
                                        if result7:
                                            worksheet.write(k, i, '118')
                                            print('7成功')
                                        else:
                                            time.sleep(0.1)
                                            if result19:
                                                worksheet.write(k, i, result19.group())
                                                print('19成功')
                                            else:
                                                time.sleep(0.1)
                                                if result16:
                                                    worksheet.write(k, i, result16.group())
                                                    print('16成功')
                                                else:
                                                    time.sleep(0.1)
                                                    if result21:
                                                        worksheet.write(k, i, result21.group())
                                                        print('21成功')
                                                    else:
                                                        time.sleep(0.1)
                                                        if result23:
                                                            worksheet.write(k, i, result23.group())
                                                            print('23成功')
                                                        else:
                                                            print('没有密码')
                time.sleep(0.1)

                pattern8 = re.compile(r'http://www.xun-niu.com/(.*?)html')
                result8 = re.search(pattern8, r.text)

                if result8:
                    # 获取一个已存在的sheet
                    # worksheet = workbook.get_sheet('sheet')
                    # # 写入一个值，括号内分别为行数、列数、内容
                    worksheet3.write(k, i, result8.group())
                    print('8成功')
                else:
                    pattern9 = re.compile(r'http://www.ibuspan.com/(.*?)html')
                    result9 = re.search(pattern9, r.text)
                    time.sleep(0.1)
                    if result9:
                        worksheet3.write(k, i, result9.group())
                        print('9成功')
                    else:
                        pattern10 = re.compile(r'http://www.onstclouds.com/(.*?)html')
                        result10 = re.search(pattern10, r.text)
                        time.sleep(0.1)
                        if result10:
                            worksheet3.write(k, i, result10.group())
                            print('10成功')
                        else:
                            pattern11 = re.compile(r'https://www.567yunpan.com/(.*?)html')
                            result11 = re.search(pattern11, r.text)
                            time.sleep(0.1)
                            if result11:
                                worksheet3.write(k, i, result11.group())
                                print('11成功')
                            else:
                                pattern12 = re.compile(r'http://www.buswok.com/(.*?)html')
                                result12 = re.search(pattern12, r.text)
                                time.sleep(0.1)
                                if result12:
                                    worksheet3.write(k, i, result12.group())
                                    print('12成功')
                                else:
                                    pattern13 = re.compile(r'http://www.xueqiupan.com/(.*?)html')
                                    result13 = re.search(pattern13, r.text)
                                    time.sleep(0.1)
                                    if result13:
                                        worksheet3.write(k, i, result13.group())
                                        print('13成功')
                                    else:
                                        pattern14 = re.compile(r'http://www.expfile.com/(.*?)html')
                                        result14 = re.search(pattern14, r.text)
                                        time.sleep(0.1)
                                        if result14:
                                            worksheet3.write(k, i, result14.group())
                                            print('14成功')
                                        else:
                                            pattern15 = re.compile(r'http://www.tadaigou.com/(.*?)html')
                                            result15 = re.search(pattern15, r.text)
                                            time.sleep(0.1)
                                            if result15:
                                                worksheet3.write(k, i, result15.group())
                                                print('15成功')
                                            else:
                                                pattern17 = re.compile(r'http://share4uspace.com(.*?)"')
                                                result17 = re.search(pattern17, r.text)
                                                time.sleep(0.1)
                                                if result17:
                                                    worksheet3.write(k, i, result17.group())
                                                    print('17成功')
                                                else:
                                                    pattern18 = re.compile(r'http://www.upfilex.com/(.*?)html')
                                                    result18 = re.search(pattern18, r.text)
                                                    time.sleep(0.1)
                                                    if result18:
                                                        worksheet3.write(k, i, result18.group())
                                                        print('18成功')
                                                    else:
                                                        pattern19 = re.compile(r'http://www.567pan.com/(.*?)html')
                                                        result19 = re.search(pattern19, r.text)
                                                        time.sleep(0.1)
                                                        if result19:
                                                            worksheet3.write(k, i, result19.group())
                                                            print('19成功')
                                                        else:
                                                            pattern20 = re.compile(r'http://www.xfpan.cc/(.*?)html')
                                                            result20 = re.search(pattern20, r.text)
                                                            time.sleep(0.1)
                                                            if result20:
                                                                worksheet3.write(k, i, result20.group())
                                                                print('20成功')
                                                            else:
                                                                pattern22 = re.compile(r'https://down.fruitpan.com/(.*?)"')
                                                                result22 = re.search(pattern22, r.text)
                                                                time.sleep(0.1)
                                                                if result22:
                                                                    worksheet3.write(k, i, result22.group())
                                                                    print('22成功')
                                                                else:
                                                                    print('没有url')
                                                                    time.sleep(0.1)


            except Exception as e:
                print('密码和url错误')
                with open('异常2.txt', 'a', encoding='utf-8') as  fp:
                    fp.write(str(e) + url2[k] + '\n')

            # 下载完毕，多线程解锁
            thread_lock.release()
            time.sleep(0.1)


        url = "https://aregsr.vip/forum-103-{}.html".format(i)

        print(url)

        r = s.get(url, headers=headers, cookies=jar)
        r.encoding = "utf-8"
        html1 = r.text
        # </a>]</em> <a href="thread-3585492-1-831.html" onclick
        pattern = r'</a>]</em> <a href="(.*?)".*?'
        img_url = re.findall(pattern, html1)
        # print(img_url)

        # 创建一个放每个标题的url
        book2 = xlwt.Workbook(encoding="utf-8", style_compression=0)
        # 创建一个sheet对象，一个sheet对象对应Excel文件中的一张表格。
        sheet = book2.add_sheet('test01', cell_overwrite_ok=False)

        # #每页新建一个文件夹
        # folder_path2 = "F:\\网盘下载区\\" + str(i)
        # if not os.path.exists(folder_path2):
        #     os.mkdir(folder_path2)
        time.sleep(0.1)
        # 填入第一列
        for j in range(0, len(img_url)):
            sheet.write(j, 0, img_url[j])

        book2.save('tttt10.xls')

        time.sleep(0.3)
        # 打开链接
        df = pd.read_excel("tttt10.xls", header=None)

        url2 = df[0].tolist()

        # 进入每一个标题
        for k in range(len(url2)):
            """print(url2[k])"""

            # 多线程上锁
            thread_lock.acquire()
            t = threading.Thread(target=get_alltitle, args=(url2[k], k))
            t.start()

            time.sleep(0.1)
    except:
        print('翻页出错')

workbook.close()




























